home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
- /* ./glimpse/index/printx.c */
- /* Just to read the index by saying "printx < .glimpse_index | more" */
- /*
- * To compile it just say "cc -o -I../agrep printx.c" and then say
- * "cc -o printx printx.c io.o provided io.c has already been made.
- */
- #include <stdio.h>
- #include "glimpse.h"
-
- /*
- * This stuff help us use io.c directly without copying stuff.
- * get_set() had to be copied from glimpse since get_index.c
- * has too much irrelevant stuff.
- */
-
- int memory_usage;
- char INDEX_DIR[MAX_LINE_LEN];
-
- unsigned char dest_index_buf[REAL_INDEX_BUF];
- unsigned char src_index_buf[REAL_INDEX_BUF];
-
- int num_filter;
- int filter_len[MAX_FILTER];
- CHAR *filter[MAX_FILTER];
- CHAR *filter_command[MAX_FILTER];
-
- char *
- my_malloc(size)
- int size;
- {
- return malloc(size);
- }
-
- int
- my_free(ptr)
- char *ptr;
- {
- return free(ptr);
- }
-
- memagrep_search(len, pat, lenbuf, buf, lenout, out)
- {
- return 0;
- }
-
- /* Returns 1 if it is a Universal set, 0 otherwise. Constraint: WORD_END_MARK/ALL_INDEX_MARK must occur at or after buffer[0] */
- get_set(buffer, partfp, OneFilePerBlock, StructuredIndex, ByteLevelIndex)
- unsigned char *buffer;
- FILE *partfp;
- int OneFilePerBlock;
- {
- int bdx2, j;
- int ret;
- int x=0, y=0, diff, even_words=1, prevy;
- int indexattr = 0;
- int delim = encode8b(0);
- int curfreq = 0;
-
- /* buffer[0] is '\n', search must start from buffer[1] */
- if (StructuredIndex) {
- if (StructuredIndex < MaxNum8bPartition - 1) {
- indexattr = decode8b(buffer[1]);
- bdx2 = 2;
- }
- else if (StructuredIndex < MaxNum16bPartition - 1) {
- indexattr = decode16b((buffer[1] << 8) | buffer[2]);
- bdx2 = 3;
- }
- else {
- indexattr = decode32b((buffer[1] << 24) | (buffer[2] << 16) | (buffer[3] << 8) | (buffer[4]));
- bdx2 = 5;
- }
- printf("%d: ", indexattr);
- }
- else bdx2 = 1;
-
- if (OneFilePerBlock) while((bdx2<REAL_INDEX_BUF+1) && (buffer[bdx2] != WORD_END_MARK) && (buffer[bdx2] != ALL_INDEX_MARK)) {
- if (isalnum(buffer[bdx2])) putchar(buffer[bdx2]);
- bdx2 ++;
- }
- else while((bdx2<REAL_INDEX_BUF+1) && (buffer[bdx2] != WORD_END_MARK)) {
- if (isalnum(buffer[bdx2])) putchar(buffer[bdx2]);
- bdx2 ++;
- }
- if (bdx2 >= REAL_INDEX_BUF+1) {
- printf("error\n");
- return 0;
- }
-
- printf(": ");
- if (OneFilePerBlock && (buffer[bdx2] == ALL_INDEX_MARK)) {
- printf("stop list\n");
- return 1;
- }
- bdx2++; /* bdx2 now points to the first byte of the offset */
-
- even_words = 1;
- /* Code identical to that in merge_in() in glimpseindex */
- if (OneFilePerBlock) {
- get_block_numbers(&buffer[bdx2], &buffer[bdx2], partfp);
- while((bdx2<REAL_INDEX_BUF) && (buffer[bdx2] != '\n') && (buffer[bdx2] != '\0')) {
- /* First get the file name */
- x = 0;
- if (ByteLevelIndex) {
- if (OneFilePerBlock <= MaxNum8bPartition) {
- x = decode8b(buffer[bdx2]);
- bdx2 ++;
- }
- else {
- x = (buffer[bdx2] << 8) | buffer[bdx2+1];
- x = decode16b(x);
- bdx2 += 2;
- }
- }
- else if (OneFilePerBlock <= MaxNum8bPartition) {
- x = decode8b(buffer[bdx2]);
- bdx2 ++;
- }
- else if (OneFilePerBlock <= MaxNum12bPartition) {
- if (even_words) {
- x = ((buffer[bdx2+1] & 0x0000000f) << 8) | buffer[bdx2];
- x = decode12b(x);
- bdx2 += 2;
- even_words = 0;
- }
- else { /* odd number of words so far */
- x = ((buffer[bdx2-1] & 0x000000f0) << 4) | buffer[bdx2];
- x = decode12b(x);
- bdx2 ++;
- even_words = 1;
- }
- }
- else if (OneFilePerBlock <= MaxNum16bPartition) {
- x = (buffer[bdx2] << 8) | buffer[bdx2+1];
- x = decode16b(x);
- bdx2 += 2;
- }
- printf("%d ", x);
-
- prevy = 0;
- if (ByteLevelIndex) {
- printf("[ ");
- while ((bdx2<REAL_INDEX_BUF) && (buffer[bdx2] != '\n') && (buffer[bdx2] != '\0')) {
- y = decode8b(buffer[bdx2]);
- if ((y & 0x000000c0) == 0) { /* one byte offset */
- diff = y&0x0000003f;
- y = prevy + diff;
- bdx2 ++;
- }
- else if ((y & 0x000000c0) == 0x40) { /* two byte offset */
- diff = decode8b(buffer[bdx2+1]);
- y = prevy + (((y & 0x0000003f) * MaxNum8bPartition) + diff);
- bdx2 += 2;
- }
- else if ((y & 0x000000c0) == 0x80) { /* three byte offset */
- diff = decode16b((buffer[bdx2+1] << 8) | buffer[bdx2+2]);
- y = prevy + (((y & 0x0000003f) * MaxNum16bPartition) + diff);
- bdx2 += 3;
- }
- else { /* four byte offset */
- diff = decode24b((buffer[bdx2+1] << 16) | (buffer[bdx2+2] << 8) | buffer[bdx2+3]);
- y = prevy + (((y & 0x0000003f) * MaxNum24bPartition) + diff);
- bdx2 += 4;
- }
- prevy = y;
- printf("%d ");
- if ((bdx2<REAL_INDEX_BUF) && (buffer[bdx2] == delim)) { /* look at offsets corr. to a new file now */
- bdx2 ++;
- break;
- }
- }
- printf("] ");
- }
- }
- }
- else {
- while((bdx2<MAX_INDEX_BUF) && (buffer[bdx2] != '\n') && (buffer[bdx2] != '\0') && (buffer[bdx2] < MAX_PARTITION)) {
- printf("%d ", buffer[bdx2++]);
- }
- }
- printf("\n");
-
- return 0;
- }
-
- main(argc, argv)
- int argc;
- char *argv[];
- {
- int c;
- int in_word = 1;
- char s[MAX_LINE_LEN];
- FILE *i_in = stdin;
- FILE *p_in;
- int j, wordoffset, index, pat_size;
- char indexnumberbuf[MAX_NAME_LEN];
- int indexnumber, onefileperblock, structuredindex, bytelevelindex;
-
- if (argc <= 1) goto oldprintx;
-
- sprintf(s, "%s/%s", argv[1], ".glimpse_index");
- if ((i_in = fopen(s, "r")) == NULL) {
- fprintf(stderr, "cannot find %s\n", s);
- exit(1);
- }
- sprintf(s, "%s/%s", argv[1], ".glimpse_partitions");
- if ((p_in = fopen(s, "r")) == NULL) {
- fprintf(stderr, "cannot find %s\n", s);
- exit(1);
- }
-
- /* modified the original in glimpse's main.c */
- fgets(indexnumberbuf, 256, i_in);
- if(strstr(indexnumberbuf, "1234567890"))
- printf("indexed numbers\n");
- else printf("not indexed numbers\n");
- fscanf(i_in, "%%%d\n", &onefileperblock);
- bytelevelindex = 0;
- if (onefileperblock < 0) {
- bytelevelindex = 1;
- onefileperblock = -onefileperblock;
- printf("byte level index\n");
- }
- else if (onefileperblock > 0) printf("file level index\n");
- else printf("block level index\n");
- fscanf(i_in, "%%%d\n", &structuredindex);
- if (structuredindex <= 0) printf("attributes = 0\n");
- else printf("attributes = %d\n", structuredindex);
-
- src_index_buf[0] = src_index_buf[REAL_INDEX_BUF - 1] = '\n';
- while (fgets(&src_index_buf[1], REAL_INDEX_BUF-1, i_in)) {
- get_set(src_index_buf, p_in, onefileperblock, structuredindex, bytelevelindex);
- src_index_buf[0] = src_index_buf[REAL_INDEX_BUF - 1] = '\n';
- }
- fclose(i_in);
- fclose(p_in);
- exit(0);
-
- oldprintx:
- while((c = getc(i_in)) != EOF) {
- if(in_word) {
- if(c == 2) {
- in_word = 0;
- printf(": ");
- }
- else if (c==3) {
- in_word = 0;
- printf("= ");
- }
- else if (isalnum(c)) printf("%c", c);
- else printf("<%d>", c);
- }
- else {
- if(c == '\n') {
- in_word = 1;
- putchar('\n');
- }
- else printf(" %d", c);
- }
- }
- }
-